1 Result

2 Setup

pacman::p_load(
  chorddiag,
  circlize,
  glue,
  here,
  janitor,
  lubridate,
  magrittr,
  skimr,
  tidyverse
)

3 Background

Background information: Tidy Tuesday
Data source: Kaggle

3.1 Tidy Tuesday

tt_info

3.2 Data Dictionary

4 Raw Data

Get data and write to local file

# get data
olympics <- tidytuesdayR::tt_load(yr, week = wk) %>% 
  pluck("olympics")

regions <- tidytuesdayR::tt_load(yr, week = wk) %>% 
  pluck("regions")
  
# join on NOC region and write to local file 
write_csv(
  left_join(olympics, regions, by = c("noc" = "NOC")), 
  here("data", glue("data_{yr}_{wk}.csv")))

Read data from local file (d_raw) and create working copy (d)

# raw data
d_raw <- read_csv(
  here("data", glue("data_{yr}_{wk}.csv")),
  col_types = cols(.default = "c"),
  na = c("NA", "NULL", "")
)
# working copy
d <- d_raw

5 Inspection

d
glimpse(d)
## Rows: 271,116
## Columns: 17
## $ id     <chr> "1", "2", "3", "4", "5", "5", "5", "5", "5", "5", "6", "6", "6"…
## $ name   <chr> "A Dijiang", "A Lamusi", "Gunnar Nielsen Aaby", "Edgar Lindenau…
## $ sex    <chr> "M", "M", "M", "M", "F", "F", "F", "F", "F", "F", "M", "M", "M"…
## $ age    <chr> "24", "23", "24", "34", "21", "21", "25", "25", "27", "27", "31…
## $ height <chr> "180", "170", NA, NA, "185", "185", "185", "185", "185", "185",…
## $ weight <chr> "80", "60", NA, NA, "82", "82", "82", "82", "82", "82", "75", "…
## $ team   <chr> "China", "China", "Denmark", "Denmark/Sweden", "Netherlands", "…
## $ noc    <chr> "CHN", "CHN", "DEN", "DEN", "NED", "NED", "NED", "NED", "NED", …
## $ games  <chr> "1992 Summer", "2012 Summer", "1920 Summer", "1900 Summer", "19…
## $ year   <chr> "1992", "2012", "1920", "1900", "1988", "1988", "1992", "1992",…
## $ season <chr> "Summer", "Summer", "Summer", "Summer", "Winter", "Winter", "Wi…
## $ city   <chr> "Barcelona", "London", "Antwerpen", "Paris", "Calgary", "Calgar…
## $ sport  <chr> "Basketball", "Judo", "Football", "Tug-Of-War", "Speed Skating"…
## $ event  <chr> "Basketball Men's Basketball", "Judo Men's Extra-Lightweight", …
## $ medal  <chr> NA, NA, NA, "Gold", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ region <chr> "China", "China", "Denmark", "Denmark", "Netherlands", "Netherl…
## $ notes  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…

6 Coercion

d %<>% mutate(across(c("id", "age", "height", "weight", "year"),  ~ as.numeric(.x)))

7 Exploration

7.1 Athletes

skim(d)
Data summary
Name d
Number of rows 271116
Number of columns 17
_______________________
Column type frequency:
character 12
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
name 0 1.00 2 108 0 134731 0
sex 0 1.00 1 1 0 2 0
team 0 1.00 2 47 0 1184 0
noc 0 1.00 3 3 0 230 0
games 0 1.00 11 11 0 51 0
season 0 1.00 6 6 0 2 0
city 0 1.00 4 22 0 42 0
sport 0 1.00 4 25 0 66 0
event 0 1.00 15 85 0 765 0
medal 231333 0.15 4 6 0 3 0
region 370 1.00 2 32 0 205 0
notes 266077 0.02 5 27 0 21 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.00 68248.9 39022.29 1 34643 68205 102097 135571 ▇▇▇▇▇
age 9474 0.97 25.6 6.39 10 21 24 28 97 ▇▃▁▁▁
height 60171 0.78 175.3 10.52 127 168 175 183 226 ▁▂▇▂▁
weight 62875 0.77 70.7 14.35 25 60 70 79 214 ▃▇▁▁▁
year 0 1.00 1978.4 29.88 1896 1960 1988 2002 2016 ▁▂▃▆▇

7.1.1 Number of Athletes

athlete_game <- d %>% 
  select(season, year, id) %>% 
  distinct() %>% 
  count(season, year)

ggplot(athlete_game, aes(x = year, y = n, color = season)) +
  geom_line() +
  labs(title = "Number of athletes at the Olympic games") +
  scale_y_continuous(name = "Participants") +
  scale_x_continuous(name = "Year")

7.1.2 Sex of Athletes

ggplot(count(d, year, sex), aes(x = as.factor(year), y = n, fill = sex)) +
  ggchicklet::geom_chicklet(position = position_fill(), radius = grid::unit(4, "pt")) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(name = "Year") +
  labs(title = "Sex of Olympic athletes") +
  theme(
    axis.title.y = element_blank(),
    axis.text.x = element_text(size = 8, angle = 90),
    panel.grid.major = element_blank()
  )

7.1.3 Age of Athletes

The youngest athlete —Dimitrios Loundras— was only 10 years old.

filter(d, age == min(age, na.rm = TRUE))

The oldest athlete —John Quincy Adams Ward— was 97 years old. He competed in the Art Competitions.

filter(d, age == max(age, na.rm = TRUE))
filter(d, sport != "Art Competitions") %>% filter(age == max(age, na.rm = TRUE))
ggplot(d, aes(
  x = fct_rev(as.factor(sport)),
  y = age,
  fill = sport
)) +
  geom_violin(color = NA) +
  scale_fill_manual(values = pal_length(
    c(
      "#5f4690",
      "#1d6996",
      "#38a6a5",
      "#0f8554",
      "#73af48",
      "#edad08",
      "#e17c05",
      "#cc503e",
      "#94346e",
      "#6f4070"
    ),
    length(unique(d$sport))
  )) +
  coord_flip() +
  theme(legend.position = "none", axis.title.y = element_blank())

7.2 Medalists

medalists <- d %>% filter(!is.na(medal))
skim(medalists)
Data summary
Name medalists
Number of rows 39783
Number of columns 17
_______________________
Column type frequency:
character 12
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
name 0 1.00 4 93 0 28202 0
sex 0 1.00 1 1 0 2 0
team 0 1.00 2 47 0 498 0
noc 0 1.00 3 3 0 149 0
games 0 1.00 11 11 0 51 0
season 0 1.00 6 6 0 2 0
city 0 1.00 4 22 0 42 0
sport 0 1.00 4 25 0 66 0
event 0 1.00 15 85 0 756 0
medal 0 1.00 4 6 0 3 0
region 9 1.00 2 27 0 136 0
notes 39238 0.01 7 27 0 11 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.00 69407.1 38849.98 4 36494 68990 103462 135563 ▇▇▇▇▇
age 732 0.98 25.9 5.91 10 22 25 29 73 ▃▇▁▁▁
height 8711 0.78 177.6 10.89 136 170 178 185 223 ▁▃▇▂▁
weight 9327 0.77 73.8 15.02 28 63 73 83 182 ▂▇▂▁▁
year 0 1.00 1973.9 33.82 1896 1952 1984 2002 2016 ▂▂▃▆▇

7.3 Country Stats

7.3.1 Total Medals

Important note: this is just an approximation, because data are at athlete-level and not country-level. Thus, team sports are overrepresented, as a single winning event is assigned to multiple athletes.

Total number of medals per country from 1896 to 2016

medal_count <- d %>% 
  group_by(region) %>% 
  count(medal) %>% 
  mutate(medal = str_to_lower(medal)) %>% 
  pivot_wider(names_from = medal, values_from = n) %>% 
  select(-`NA`) %>% 
  mutate(across(c("gold", "silver", "bronze"), ~replace_na(.x, 0))) %>% 
  mutate(total = gold + silver + bronze) %>% 
  select(region, gold, silver, bronze, total) %>% 
  ungroup()

medal_count

7.3.2 Most Medals

Top 25 countries with most olympic medals
(not very informative —not taking number of athletes and times participated into account)

medal_count %>% 
  slice_max(order_by = total, n = 25) %>% 
  rowid_to_column("rank")

7.3.3 Times Participated

Number of games in which countries have participated

times_participated <- d %>% 
  select(region, games) %>% 
  distinct() %>% 
  group_by(region) %>% 
  count() %>% 
  arrange(desc(n)) %>% 
  rename(events = n)

times_participated

7.3.4 Win Percentage

Number of medalists (any color) divided by number of participants per country per year

win_ratio <- left_join(
    count(d, region, year),
    count(medalists, region, year), 
    by = c("region", "year")) %>% 
  arrange(desc(year)) %>% 
  rename(participants = n.x, medalists = n.y) %>% 
  mutate(medalists = replace_na(medalists, 0)) %>%
  mutate(ratio = formattable::percent(medalists / participants))

win_ratio %>% 
  select(-medalists, -participants) %>% 
  pivot_wider(names_from = year, values_from = ratio)

7.3.5 Highest Win Percentage

Country with highest medalists/participants ratio per year

win_ratio %>% group_by(year) %>% filter(ratio == max(ratio)) %>% select(year, region, ratio, medalists, participants)

7.3.6 No Medals

69 participating countries are still awaiting their first medal.

medal_count %>% filter(total == 0) %>% pull(region)
##  [1] "Albania"                          "American Samoa"                  
##  [3] "Andorra"                          "Angola"                          
##  [5] "Antigua"                          "Aruba"                           
##  [7] "Bangladesh"                       "Belize"                          
##  [9] "Benin"                            "Bhutan"                          
## [11] "Boliva"                           "Bosnia and Herzegovina"          
## [13] "Brunei"                           "Burkina Faso"                    
## [15] "Cambodia"                         "Cape Verde"                      
## [17] "Cayman Islands"                   "Central African Republic"        
## [19] "Chad"                             "Comoros"                         
## [21] "Cook Islands"                     "Democratic Republic of the Congo"
## [23] "Dominica"                         "El Salvador"                     
## [25] "Equatorial Guinea"                "Gambia"                          
## [27] "Guam"                             "Guinea"                          
## [29] "Guinea-Bissau"                    "Honduras"                        
## [31] "Kiribati"                         "Laos"                            
## [33] "Lesotho"                          "Liberia"                         
## [35] "Libya"                            "Madagascar"                      
## [37] "Malawi"                           "Maldives"                        
## [39] "Mali"                             "Malta"                           
## [41] "Marshall Islands"                 "Mauritania"                      
## [43] "Micronesia"                       "Myanmar"                         
## [45] "Nauru"                            "Nicaragua"                       
## [47] "Oman"                             "Palau"                           
## [49] "Palestine"                        "Papua New Guinea"                
## [51] "Republic of Congo"                "Rwanda"                          
## [53] "Saint Kitts"                      "Saint Lucia"                     
## [55] "Saint Vincent"                    "Samoa"                           
## [57] "San Marino"                       "Sao Tome and Principe"           
## [59] "Seychelles"                       "Sierra Leone"                    
## [61] "Solomon Islands"                  "Somalia"                         
## [63] "South Sudan"                      "Swaziland"                       
## [65] "Timor-Leste"                      "Turkmenistan"                    
## [67] "Vanuatu"                          "Virgin Islands, British"         
## [69] "Yemen"

7.3.7 Summer/Winter

both_seasons <-
  d %>% 
  select(region, season) %>% 
  distinct() %>% 
  group_by(region) %>% 
  count() %>% 
  filter(n == 2) %>% 
  pull(region)

n_winter_medals <- medalists %>% filter(season == "Winter") %>% count() %>% pull(n)
n_summer_medals <- medalists %>% filter(season == "Summer") %>% count() %>% pull(n) 

medals_season <- medalists %>% 
  filter(region %in% both_seasons) %>% 
  group_by(region, season) %>% 
  count() %>% 
  mutate(prop_total = case_when(
    season == "Winter" ~ n / n_winter_medals,
    season == "Summer" ~ n / n_summer_medals
  ))

7.4 Sports Stats

Unique sports

sports_winter <- d %>% 
  filter(season == "Winter") %>% 
  pull(sport) %>% 
  unique() %>% 
  sort()
sports_winter
##  [1] "Alpine Skiing"             "Alpinism"                 
##  [3] "Biathlon"                  "Bobsleigh"                
##  [5] "Cross Country Skiing"      "Curling"                  
##  [7] "Figure Skating"            "Freestyle Skiing"         
##  [9] "Ice Hockey"                "Luge"                     
## [11] "Military Ski Patrol"       "Nordic Combined"          
## [13] "Short Track Speed Skating" "Skeleton"                 
## [15] "Ski Jumping"               "Snowboarding"             
## [17] "Speed Skating"
sports_summer <- d %>% 
  filter(season == "Summer") %>% 
  pull(sport) %>% 
  unique() %>% 
  sort()
sports_summer
##  [1] "Aeronautics"           "Alpinism"              "Archery"              
##  [4] "Art Competitions"      "Athletics"             "Badminton"            
##  [7] "Baseball"              "Basketball"            "Basque Pelota"        
## [10] "Beach Volleyball"      "Boxing"                "Canoeing"             
## [13] "Cricket"               "Croquet"               "Cycling"              
## [16] "Diving"                "Equestrianism"         "Fencing"              
## [19] "Figure Skating"        "Football"              "Golf"                 
## [22] "Gymnastics"            "Handball"              "Hockey"               
## [25] "Ice Hockey"            "Jeu De Paume"          "Judo"                 
## [28] "Lacrosse"              "Modern Pentathlon"     "Motorboating"         
## [31] "Polo"                  "Racquets"              "Rhythmic Gymnastics"  
## [34] "Roque"                 "Rowing"                "Rugby"                
## [37] "Rugby Sevens"          "Sailing"               "Shooting"             
## [40] "Softball"              "Swimming"              "Synchronized Swimming"
## [43] "Table Tennis"          "Taekwondo"             "Tennis"               
## [46] "Trampolining"          "Triathlon"             "Tug-Of-War"           
## [49] "Volleyball"            "Water Polo"            "Weightlifting"        
## [52] "Wrestling"

Sports that have been part of both the summer and winter Olympics

base::intersect(sports_summer, sports_winter)
## [1] "Alpinism"       "Figure Skating" "Ice Hockey"

Times a sport has been on the Olympic schedule, incl. first and last year

sport_stats <- d %>% group_by(sport) %>% summarise(
  first = min(year),
  last = max(year),
  times = n_unique(year)
)

Sports that have been on the Olympic schedule only once

sport_stats %>% 
  filter(times == 1) %>% 
  select(-times, -first) %>% 
  rename(year = last) %>% 
  arrange(desc(year))

Number of unique sports and events

sport_count <- d %>% 
  select(year, season, sport) %>% 
  distinct() %>% 
  group_by(year, season) %>% 
  count()

event_count <- d %>% 
  select(year, season, event) %>% 
  distinct() %>% 
  group_by(year, season) %>% 
  count()

ggplot(sport_count, aes(x = year, y = n, color = season)) +
  geom_line() +
  labs(title = "Unique sports at the Olympic games") +
  scale_y_continuous(name = "Sports") +
  scale_x_continuous(name = "Year") +
  scale_fill_discrete(name = "Season")

ggplot(event_count, aes(x = year, y = n, color = season)) +
  geom_line() +
  labs(title = "Unique events at the Olympic games") +
  scale_y_continuous(name = "Events") +
  scale_x_continuous(name = "Year") +
  scale_fill_discrete(name = "Season")

7.4.1 Male Legends

Male athlete(s) with most medals (any color) per sport, only shown if ≥3 medals

medalists %>% 
  filter(sex == "M") %>% 
  count(id, name, sport) %>% 
  group_by(sport) %>% 
  filter(n == max(n) & n >= 3) %>% 
  select(-id) %>% 
  group_by(sport) %>% 
  mutate(nr = row_number()) %>% 
  pivot_wider(names_from = nr, values_from = name) %>% 
  unite("athletes", 3:ncol(.), sep = ", ", na.rm = TRUE) %>% 
  rename(medals = n) %>% 
  arrange(sport)

7.4.2 Female Legends

Female athlete(s) with most medals (any color) per sport, only shown if ≥3 medals

medalists %>% 
  filter(sex == "F") %>% 
  count(id, name, sport) %>% 
  group_by(sport) %>% 
  filter(n == max(n) & n >= 3) %>% 
  select(-id) %>% 
  group_by(sport) %>% 
  mutate(nr = row_number()) %>% 
  pivot_wider(names_from = nr, values_from = name) %>% 
  unite("athletes", 3:ncol(.), sep = ", ", na.rm = TRUE) %>% 
  rename(medals = n) %>% 
  arrange(sport)

7.5 Individual Stats

Top 10 male athletes with most olympic medals

medalists %>% 
  filter(sex == "M") %>% 
  count(name) %>% 
  slice_max(order_by = n, n = 10) %>% 
  rename(medals = n) %>% 
  rowid_to_column("rank")

Top 10 female athletes with most olympic medals

medalists %>% 
  filter(sex == "F") %>% 
  count(name) %>% 
  slice_max(order_by = n, n = 10) %>% 
  rename(medals = n) %>% 
  rowid_to_column("rank")

7.6 Multipotentialites

With the term “multipotentialite” I refer to athletes with olympic medals in multiple sports.

Filter athletes with medals in >1 sport

multipotentialite_id <- medalists %>% 
  group_by(id) %>% 
  count(sport) %>% 
  count(id) %>% 
  filter(n > 1) %>% 
  pull(id)

# number of multipotentialites
n_unique(multipotentialite_id)
## [1] 86
# select multipotentialites among medalists
multipotentialite <- filter(medalists, id %in% multipotentialite_id)
multipotentialite

8 Chord Diagram

I decided to create a chord diagram of the crossover between sports by multipotentialites.

8.1 Data Preparation

8.1.1 Pivot Data

Pivot wider (1 row per athlete)

multipotentialite_pivot <- multipotentialite %>% 
  arrange(year) %>% 
  select(id, name, sport) %>% 
  distinct() %>% 
  group_by(id, name) %>% 
  mutate(nr = paste0("sport", row_number())) %>% 
  ungroup() %>% 
  pivot_wider(names_from = nr, values_from = sport)

multipotentialite_pivot

8.1.2 Expand Grid

Expand grid for non-directional/symmetrical chord (A to B == B to A). I decided to keep the chord non-directional, because in some cases (medals won during the same event) it wasn’t clear which medal came first.

bidirectional <- multipotentialite_pivot %>%
  rowwise() %>%
  mutate(grid = list(expand_grid(
      source = c(sport1, sport2, sport3),
      target = c(sport1, sport2, sport3)) %>%
    filter(source != target))) %>%
  select(-starts_with("sport")) %>%
  unnest(grid)

bidirectional

8.1.3 Unique Sports

# unique sports
sports <- medalists %>% 
  filter(id %in% multipotentialite_id) %>% 
  pull(sport) %>% 
  sort() %>% 
  unique()

sports
##  [1] "Art Competitions"          "Athletics"                
##  [3] "Beach Volleyball"          "Biathlon"                 
##  [5] "Bobsleigh"                 "Boxing"                   
##  [7] "Cross Country Skiing"      "Cycling"                  
##  [9] "Diving"                    "Equestrianism"            
## [11] "Fencing"                   "Football"                 
## [13] "Gymnastics"                "Handball"                 
## [15] "Hockey"                    "Luge"                     
## [17] "Modern Pentathlon"         "Nordic Combined"          
## [19] "Polo"                      "Rowing"                   
## [21] "Rugby"                     "Sailing"                  
## [23] "Shooting"                  "Short Track Speed Skating"
## [25] "Skeleton"                  "Ski Jumping"              
## [27] "Speed Skating"             "Swimming"                 
## [29] "Tennis"                    "Tug-Of-War"               
## [31] "Volleyball"                "Water Polo"               
## [33] "Weightlifting"             "Wrestling"
# number of unique sports
nsports <- length(sports)
nsports
## [1] 34

8.1.4 Create Matrix

Create matrix with all options

m <- left_join(
  expand_grid(source = sports, target = sports), 
  count(bidirectional, source, target),
  by = c("source", "target")) %>%
  mutate(n = replace_na(n, 0)) %>%
  pivot_wider(names_from = target, values_from = n) %>%
  column_to_rownames(var = "source") %>% 
  as.matrix(
    nrow = length(sports), 
    ncol = length(sports))

8.1.5 Set Colors

Color only sports with an frequency greater than 5

# find sports with frequency > 5
colorsport <- bidirectional %>% 
  count(source) %>% 
  filter(n > 5) %>%
  pull(source)

# create color vector with base color
colors <- rep("#e3ded3", times = nsports) 

# add names to color vector
names(colors) <- sports 

# specify manual palette
manual_pal_1 <-
  pal_length(
    c(
      "#da3238",
      "#d43f85",
      "#5d2e91",
      "#3656a4",
      "#107fb8",
      "#3a9e8d",
      "#77bb52"
    ),
    length(colorsport)
  )

# replace base color with color from palette for selected sports
colors[which(sports %in% colorsport)] <- manual_pal_1

# show colors
colorspace::swatchplot(colors)

# try different palettes
manual_pal_2 <- pal_length(
  c(
    "#edad08",
    "#e17c05",
    "#cc503e",
    "#94346e",
    "#6f4070",
    "#5f4690",
    "#1d6996",
    "#38a6a5",
    "#0f8554",
    "#73af48"
  ),
  length(colorsport)
)
colors[which(sports %in% colorsport)] <- manual_pal_2
colorspace::swatchplot(colors)

manual_pal_3 <- pal_length(
  c(
    "#b53471",
    "#b3438e",
    "#aa55aa",
    "#9b66c1",
    "#8777d4",
    "#6d87e2",
    "#4c96e9",
    "#1ba3ea",
    "#00afe6",
    "#00b9dd",
    "#00c3d2",
    "#12cbc4"
  ),
  length(colorsport
  )
)
colors[which(sports %in% colorsport)] <- manual_pal_3
colorspace::swatchplot(colors)

# choose palette #1
colors[which(sports %in% colorsport)] <- manual_pal_1

8.2 Draw Chord

I decided to plot the chord diagram with the circlize package in R.

chordDiagram(
  m,
  symmetric = TRUE,
  grid.col = colors,
  order = colnames(m)
)

This result wasn’t exactly what I had in mind (apologies for not fixing the ugly labels).

The chord diagram is non-directional due to the symmetric nature of the data. Therefore, coloring according to only one of the two sports feels inappropriate. An alternative is to create gradient chords based on the colors of both the source and target sport —instead of only using the source color.

I read a great blog post by Nadieh Bremer on how to achieve this with SVG in D3. Unfortunately, I haven’t figured out a way to do this with R yet. Therefore, I decided to continue the visualization in an Observable notebook using SVG and D3.js.

Link: Observable notebook

8.3 Write Arrays

To use the data in Observable, I formatted the data as javascript arrays (in plain text) for copy-pasting into an Observable notebook.

# main matrix
arr <- "["
for(i in 1:nsports){
  a <- paste0("[", paste(as.character(m[i, ]), collapse = ','), "]" )
  b <- ifelse(i == length(sports), "]", ",")
  arr <- paste0(arr, a, b)
}

# links
write_file(
  arr,
  here("code", "2021_31", "arrays", "links.txt"))
## [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0],[0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0],[0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0],[0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,1,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0],[0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,3,0,0,0,1],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0],[1,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],[0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],[1,0,0,0,0,0,0,0,5,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23,0,0],[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,3,2],[0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23,0,0,0,0,0,0],[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,0,0,1],[0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0]]
# sports
write_file(
  paste0('["', paste(sports, collapse='","'), '"]'),
  here("code", "2021_31", "arrays", "nodes.txt"))
## ["Art Competitions","Athletics","Beach Volleyball","Biathlon","Bobsleigh","Boxing","Cross Country Skiing","Cycling","Diving","Equestrianism","Fencing","Football","Gymnastics","Handball","Hockey","Luge","Modern Pentathlon","Nordic Combined","Polo","Rowing","Rugby","Sailing","Shooting","Short Track Speed Skating","Skeleton","Ski Jumping","Speed Skating","Swimming","Tennis","Tug-Of-War","Volleyball","Water Polo","Weightlifting","Wrestling"]
# colors
write_file(
  paste0('["', paste(colors, collapse='","'), '"]'),
  here("code", "2021_31", "arrays", "colors.txt"))
## ["#e3ded3","#da3238","#e3ded3","#e3ded3","#d63962","#e3ded3","#c93d86","#e3ded3","#88348c","#e3ded3","#553594","#e3ded3","#404b9e","#e3ded3","#e3ded3","#e3ded3","#e3ded3","#2b61a9","#e3ded3","#e3ded3","#e3ded3","#e3ded3","#1677b4","#e3ded3","#e3ded3","#e3ded3","#e3ded3","#1f8aa8","#e3ded3","#369b90","#e3ded3","#55ab72","#77bb52","#e3ded3"]

8.4 Observable Plot

Session Info

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/nl_NL.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rvest_1.0.0      skimr_2.1.3      lubridate_1.7.10 janitor_2.1.0   
##  [5] here_1.0.1       glue_1.4.2       circlize_0.4.13  chorddiag_0.1.3 
##  [9] magrittr_2.0.1   forcats_0.5.1    stringr_1.4.0    dplyr_1.0.7     
## [13] purrr_0.3.4      readr_1.4.0      tidyr_1.1.3      tibble_3.1.3    
## [17] ggplot2_3.3.5    tidyverse_1.3.1 
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.2          sass_0.4.0          jsonlite_1.7.2     
##  [4] modelr_0.1.8        bslib_0.2.5.1       assertthat_0.2.1   
##  [7] highr_0.9           selectr_0.4-2       cellranger_1.1.0   
## [10] yaml_2.2.1          gdtools_0.2.3       Rttf2pt1_1.3.8     
## [13] pillar_1.6.1        backports_1.2.1     extrafontdb_1.0    
## [16] digest_0.6.27       snakecase_0.11.0    colorspace_2.0-2   
## [19] htmltools_0.5.1.1   hrbrthemes_0.8.0    pkgconfig_2.0.3    
## [22] broom_0.7.8         haven_2.4.1         scales_1.1.1       
## [25] farver_2.1.0        generics_0.1.0      ellipsis_0.3.2     
## [28] pacman_0.5.1        withr_2.4.2         repr_1.1.3         
## [31] cli_3.0.1           crayon_1.4.1        readxl_1.3.1       
## [34] evaluate_0.14       fs_1.5.0            fansi_0.5.0        
## [37] xml2_1.3.2          tools_4.0.5         hms_1.1.0          
## [40] GlobalOptions_0.1.2 lifecycle_1.0.0     ggchicklet_0.5.2   
## [43] munsell_0.5.0       reprex_2.0.0        formattable_0.2.1  
## [46] compiler_4.0.5      jquerylib_0.1.4     systemfonts_1.0.2  
## [49] rlang_0.4.11        grid_4.0.5          rstudioapi_0.13    
## [52] htmlwidgets_1.5.3   labeling_0.4.2      base64enc_0.1-3    
## [55] rmarkdown_2.9       gtable_0.3.0        DBI_1.1.1          
## [58] curl_4.3.2          R6_2.5.0            knitr_1.33         
## [61] extrafont_0.17      utf8_1.2.2          rprojroot_2.0.2    
## [64] shape_1.4.6         stringi_1.7.2       Rcpp_1.0.7         
## [67] vctrs_0.3.8         png_0.1-7           dbplyr_2.1.1       
## [70] tidyselect_1.1.1    xfun_0.24